package com.spider.spider;

import com.spider.mapper.SpiderMapper;
import com.spider.pojo.Spider;
import com.spider.spider.annotation.JobCrawlerAn;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.*;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.List;
import java.util.Random;

/**
 * Created by Ruan on 2017/9/12.
 */

@Component
public class ZiRoomJSProcessor implements PageProcessor {

    private final SpiderMapper spiderMapper;

    ZiRoomJSProcessor(SpiderMapper spiderMapper) {
        this.spiderMapper = spiderMapper;
    }
    private Site site = Site.me().setRetryTimes(3).setSleepTime(500).setTimeOut(5000).
            setUserAgent(JobCrawlerAn.USER_AGENTS[new Random().nextInt(JobCrawlerAn.USER_AGENTS.length)])
            ;

    private static final String PAGES_URL ="http://hz.ziroom.com/z/nl/.*";
    private static final String ROOM_URL ="http://hz.ziroom.com/z/vr/.*";

    @Override
    public synchronized void process(Page page) {

        site.setUserAgent(JobCrawlerAn.USER_AGENTS[new Random().nextInt(JobCrawlerAn.USER_AGENTS.length)]);

        if (page.getUrl().regex(PAGES_URL).match()){
            String pageUrl = page.getHtml().xpath("//div[@class='pages']/a[@class='next']/@href").toString();
            page.addTargetRequest(pageUrl);
            List<String> houseList = page.getHtml().xpath("//ul[@id='houseList']/li/div[2]/h3/a/@href").all();
            page.addTargetRequests(houseList);
            List<String> clearfixFilterList = page.getHtml().xpath("//ul[@class='clearfix filterList']/li/span/a/@href").all();
            page.addTargetRequests(clearfixFilterList);
            List<String> clearfixT = page.getHtml().xpath("//div[@class='clearfix t']/a/@href]").all();
            page.addTargetRequests(clearfixT);

        } else if(page.getUrl().regex(ROOM_URL).match()){
            String title = page.getHtml().xpath("//div[@class='room_name']/h2/text()").toString();
            String link = page.getRequest().getUrl();
            String price = page.getHtml().xpath("/html/body/div[3]/div[2]/div[1]/p/span[2]/span[1]/text()").toString();
            String time_unit = page.getHtml().xpath("//div[@class='room_name']/p/span[2]/span[2]/regex('.*\\((.+)\\).*',1)").toString();
            String area = page.getHtml().xpath("/html/body/div[3]/div[2]/ul/li[1]/regex('[\\s\\S]*?(\\d+(?:\\.\\d+)?).*㎡.*',1)").toString();
            String rooms = page.getHtml().xpath("/html/body/div[3]/div[2]/ul/li[3]/regex('(\\d+).*',1)").toString();
            String halls = page.getHtml().xpath("/html/body/div[3]/div[2]/ul/li[3]/regex('.*(\\d+).*',1)").toString();
            String rentType = page.getHtml().xpath("//ul[@class='detail_room']/li[3]/span/text()").toString();
            String floorTotal = page.getHtml().xpath("//ul[@class='detail_room']/li[4]/regex('.*/(\\d+).*$',1)").toString();
            String floorLoc= page.getHtml().xpath("//ul[@class='detail_room']/li[4]/regex('(\\d+)/.*',1)").toString();
            String lng =  page.getHtml().xpath("//*[@id=\"mapsearchText\"]/@data-lng").toString();
            String lat= page.getHtml().xpath("//*[@id=\"mapsearchText\"]/@data-lat").toString();
            String direction= page.getHtml().xpath("/html/body/div[3]/div[2]/ul/li[2]/text()").toString();
            String confGen= page.getHtml().xpath("/html/body/div[3]/div[2]/p/a/span/regex('.*?(\\d+\\.?\\d*).*',1)").toString();
            String confType= page.getHtml().xpath("/html/body/div[3]/div[2]/p/a/span/regex('.*?\\d+\\.?\\d* *(.*)$',1)").toString();
            String privateBathroom =  page.getHtml().xpath("/html/body/div[3]/div[2]/p/span[@class=\"toilet\"]/text()").toString();
            String privateBalcony= page.getHtml().xpath("/html/body/div[3]/div[2]/p/span[@class=\"balcony\"]/text()").toString();
            String nearestSubWayDist =  page.getHtml().xpath("//ul[@class='detail_room']/li[5]/span/regex('.*?(\\d+)米.*',1)").toString();
            String district= page.getHtml().xpath("/html/body/div[3]/div[2]/div[1]/p/span[1]/regex('.*?\\[(.+?) .*',1)").toString();

            Spider spider = new Spider();
            spider.setTitle(title);
            spider.setLink(link);
            spider.setPrice(Integer.valueOf(price.replaceAll("[^0-9]", "")));
            spider.setTimeUnit(time_unit);
            spider.setArea(Double.parseDouble(area));
            spider.setRooms(Integer.valueOf(rooms));
            spider.setHalls(Integer.valueOf(halls));
            spider.setRentType(rentType);
            spider.setFloorTotal(Integer.valueOf(floorTotal));
            spider.setFloorloc(Integer.valueOf(floorLoc));
            spider.setLng(Double.parseDouble(lng));
            spider.setLat(Double.parseDouble(lat));
            spider.setDirection(direction);
            spider.setConfgen(confGen);
            if (spider.getConftype()!=null)
                spider.setConftype(confType);
            if (privateBathroom == null) {
                spider.setPrivatebathroom(0);//没有独卫
            } else
                spider.setPrivatebathroom(1);//有独卫
            if (privateBalcony.equals("免物业费")) {
                spider.setPrivatebalcony(0);//没有独立阳台
            } else
                spider.setPrivatebalcony(1);//有独立阳台
            if (StringUtils.isBlank(nearestSubWayDist)){
            spider.setNearestsubwaydist(null);
            }else{
                spider.setNearestsubwaydist(Integer.valueOf(nearestSubWayDist));
            }
            spider.setDistrict(district);
            System.out.println(spider);
            Spider spider1 = new Spider();
            spider1.setTitle(spider.getTitle());
            spider1.setPrice(spider.getPrice());
            spider1.setTimeUnit(spider.getTimeUnit());
            spider1.setArea(spider.getArea());
            spider1.setRooms(spider.getRooms());
            spider1.setHalls(spider.getHalls());
            spider1.setRentType(spider.getRentType());
            spider1.setFloorTotal(spider.getFloorTotal());
            spider1.setFloorloc(spider.getFloorloc());
            spider1.setLng(spider.getLng());
            spider1.setLat(spider.getLat());
            spider1.setDirection(spider.getDirection());
            spider1.setConfgen(spider.getConfgen());

            Spider spider2 = spiderMapper.selectSpiderByObj(spider1);
            if (spider2 == null)
            spiderMapper.insertSpider(spider);

        }
    }
    @Override
    public Site getSite() {
        return site;
    }


}
